The libraries I will use:

library(data.table)
library(dplyr)
library(jpeg)
library(imager)
library(factoextra)

I read the data and apply PCA:

raw_data <- read.csv("/Users/ayberkakgun/Desktop/Musk1.csv") %>% as.data.frame()
features<-scale(raw_data[3:168])
labels<-raw_data[1:2]
colnames(labels)<-c("BagClass","BagId")
pca<-princomp(features)
str(pca)
## List of 7
##  $ sdev    : Named num [1:166] 7.19 4.8 3.55 2.9 2.86 ...
##   ..- attr(*, "names")= chr [1:166] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ loadings: loadings [1:166, 1:166] -0.036844 -0.06835 -0.094764 0.092619 -0.000908 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:166] "X42" "X.198" "X.109" "X.75" ...
##   .. ..$ : chr [1:166] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ center  : Named num [1:166] 1.60e-16 -6.24e-17 3.54e-17 1.66e-17 -6.13e-17 ...
##   ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
##  $ scale   : Named num [1:166] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
##  $ n.obs   : int 475
##  $ scores  : num [1:475, 1:166] -0.787 -0.255 -1.359 -1.464 -1.439 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:166] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
##  $ call    : language princomp(x = features)
##  - attr(*, "class")= chr "princomp"
eigenvalue<-get_eigenvalue(pca)
head(eigenvalue,10)
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   51.747369        31.238880                    31.23888
## Dim.2   23.049909        13.914782                    45.15366
## Dim.3   12.628826         7.623777                    52.77744
## Dim.4    8.400432         5.071178                    57.84862
## Dim.5    8.164036         4.928469                    62.77709
## Dim.6    6.755840         4.078369                    66.85545
## Dim.7    5.364907         3.238690                    70.09414
## Dim.8    5.040095         3.042607                    73.13675
## Dim.9    3.302223         1.993488                    75.13024
## Dim.10   2.849416         1.720137                    76.85038

#a First 5 components explain more than 70 % of the variability and cumulative contribution gets smaller.

eig<-eigenvalue$variance.percent[eigenvalue$cumulative.variance.percent<85]
l<-length(eig)
ggplot()+
    geom_col(aes(x=1:l,y=eig))+
    geom_line(aes(x=1:l,y=eig,col="red"))

If we check plot first two components we see the two dimensions are not enough to distinguish bag labels:

plot(x=pca$scores[,1],y=pca$scores[,2],col=raw_data$X1+1)
abline(h=0,v=0,lty=3)

I calculate distance matrix and perform MDS on it, without forgetting to scale the data.

distanceMat<-dist(features,upper=T) %>% as.matrix()
mds<-cmdscale(distanceMat)
plot(mds[,1],mds[,2],
     # main='Location',
     xlab='', ylab='',
    col=raw_data$X1+1)

#b Once again perform PCA this time on aggregated data,

data_agg<-aggregate(raw_data,by=list(raw_data$X1.1),FUN=mean)
features_agg<-scale(data_agg[4:169])
pca_agg<-prcomp(features_agg)
str(pca_agg)
## List of 5
##  $ sdev    : num [1:92] 6.65 5.62 4.69 4.21 3.1 ...
##  $ rotation: num [1:166, 1:92] -0.0536 -0.0653 -0.0607 0.0848 -0.0135 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:166] "X42" "X.198" "X.109" "X.75" ...
##   .. ..$ : chr [1:92] "PC1" "PC2" "PC3" "PC4" ...
##  $ center  : Named num [1:166] -1.35e-16 -6.12e-17 -8.78e-17 -2.73e-17 -4.36e-17 ...
##   ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
##  $ scale   : Named num [1:166] 18.1 76.8 57.4 66.2 20.5 ...
##   ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
##  $ x       : num [1:92, 1:92] -1.63 -3.51 -4.39 -3.38 -5.25 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:92] "PC1" "PC2" "PC3" "PC4" ...
##  - attr(*, "class")= chr "prcomp"
eigenvalue_agg<-get_eigenvalue(pca_agg)
head(eigenvalue_agg,10)
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   44.241806        26.651690                    26.65169
## Dim.2   31.636561        19.058170                    45.70986
## Dim.3   21.961633        13.229899                    58.93976
## Dim.4   17.709779        10.668542                    69.60830
## Dim.5    9.609901         5.789097                    75.39740
## Dim.6    7.555042         4.551230                    79.94863
## Dim.7    5.042804         3.037834                    82.98646
## Dim.8    3.553406         2.140606                    85.12707
## Dim.9    3.268410         1.968922                    87.09599
## Dim.10   2.567582         1.546736                    88.64273
plot(x=pca_agg$x[,1],y=pca_agg$x[,2],col=data_agg$X1+1)

Same MDS operation on aggregated data,

distanceMat_agg<-dist(features_agg,upper=T) %>% as.matrix()
mds_agg<-cmdscale(distanceMat_agg)
plot(mds_agg[,1],mds_agg[,2],
     # main='Location',
     xlab='', ylab='',
     col=data_agg$X1+1)

#c

#Task 2

I read my image, and plot it.

img <- readJPEG("/Users/ayberkakgun/Desktop/profil2.jpg")
plot(c(0, 256), c(0, 256), type = "n", xlab = "", ylab = "")
rasterImage(img, 0, 0, 256, 256)

I split the channels and plot them seperately.

r<-img[1:256,1:256,1]
g<-img[1:256,1:256,2]
b<-img[1:256,1:256,3]
red_palette <- colorRampPalette(c("black","red"))
blue_palette <- colorRampPalette(c("black","blue"))
green_palette <- colorRampPalette(c("black","green")) 
par(mfrow=c(1,3))
# red_palet
image(r,col=red_palette(256),main="red",axes = F)
image(g,col=green_palette(256),main="green",axes=F)
image(b,col=blue_palette(256),main="blue",axes=F)

I add noise to my image.

minx<-min(img)
maxx<-max(img)*0.1
noise<-matrix(runif(65536,min=minx,max=maxx),256)
r2<-r+noise
r2[r2>1]=1
g2<-g+noise
g2[g2>1]=1
b2<-b+noise
b2[b2>1]=1
# img2 <- rgb(r2, g2, b2)
img2<-img
img2[,,1]<-r2
img2[,,2]<-g2
img2[,,3]<-b2
plot(c(0, 256), c(0, 256), type = "n", xlab = "", ylab = "")
rasterImage(img2, 0, 0, 256, 256)

I construct the patches and perform PCA on them.

imgr <- load.image("/Users/ayberkakgun/Desktop/profil2.jpg")
imgrr<-grayscale(imgr)
vectors <-matrix(nrow=53824,ncol = 625)
k<-1
for (i in 1:232){
    for(j in 1:232){
        vectors[k,]<-imgrr[i:(i+24),j:(j+24)] %>% as.vector()
        k<-k+1
    }
    }

# pca_img<-princomp(vectors)